rm(list = ls())
library(pacman)
p_load(readxl, dplyr, irr)

################## CHINA ############################

## Load and clean coder 1's full coding

# Read data and remove unwanted rows
coder1 <- read_excel("../data/china_original.xlsx")

# remove unwanted naming rows carried over from Excel file
coder1 <- coder1[-c(1, 2, 127, 422), ]

# Convert Excel numeric dates to Date format
coder1$date <- as.numeric(coder1$date)
coder1$date <- as.Date(coder1$date, origin = "1899-12-30")

# Convert columns 4 to 108 to numeric
coder1 <- coder1 %>%
  mutate(across(4:105, as.numeric))

# Replace NA values with 0
coder1[is.na(coder1)] <- 0

# Create summary columns for Mao and others
coder1 <- coder1 %>%
  mutate(
    Mao_symbolic = rowSums(across(c(Mao_2, Mao_3))),
    Mao_total = rowSums(across(c(Mao_1, Mao_2, Mao_3))),
    others_symbolic = rowSums(across(ends_with("_2") | ends_with("_3") & !matches("Mao"))),
    others_non_symbolic = rowSums(across(ends_with("_1") & !matches("Mao"))),
    others_total = rowSums(across(ends_with("_1") | ends_with("_2") | ends_with("_3") & !matches("Mao")))
  )

## Load and clean coder 2's coding of 20 randomly chosen issues

coder2 <- read_excel("../data/china_coder2.xlsx")

# Convert date column to Date
coder2$date <- as.Date(coder2$date)

# Remove first row
coder2 <- coder2[-1, ]

# Convert columns 6 to 110 to numeric
coder2 <- coder2 %>%
  mutate(across(6:107, as.numeric))

# Replace NA values with 0
coder2[is.na(coder2)] <- 0

# Create same summary columns for Mao and others
coder2 <- coder2 %>%
  mutate(
    Mao_symbolic = rowSums(across(c(Mao_2, Mao_3))),
    Mao_total = rowSums(across(c(Mao_1, Mao_2, Mao_3))),
    others_symbolic = rowSums(across(ends_with("_2") | ends_with("_3") & !matches("Mao"))),
    others_non_symbolic = rowSums(across(ends_with("_1") & !matches("Mao"))),
    others_total = rowSums(across(ends_with("_1") | ends_with("_2") | ends_with("_3") & !matches("Mao")))
  )


## Prepare datasets for comparison 

# Select relevant columns and rename Mao_3 as Mao_symbolic
coder1_select <- coder1 %>%
  select(date, Mao_1, Mao_symbolic, Mao_total, others_symbolic, others_non_symbolic, others_total) %>%
  filter(date %in% coder2$date) %>%
  rename(Mao_non_symbolic = Mao_1)

coder2_select <- coder2 %>%
  select(date, Mao_1, Mao_symbolic, Mao_total, others_symbolic, others_non_symbolic, others_total) %>%
  rename(Mao_non_symbolic = Mao_1)


## Combine datasets by date for rating comparison

combined <- inner_join(coder1_select, coder2_select, by = "date", suffix = c("_coder1", "_coder2"))


## Calculate Krippendorff's alpha

# Stack ratings from both coders (coder1 and coder2)
Mao_symbolic_ratings <- rbind(
  combined$Mao_symbolic_coder1,
  combined$Mao_symbolic_coder2
)

# Calculate alpha
alpha_mao_symbolic <- kripp.alpha(Mao_symbolic_ratings, method = "ratio")$value


## Calculate alphas for other variables

vars <- c("Mao_symbolic", "Mao_non_symbolic", "Mao_total", "others_symbolic", "others_non_symbolic", "others_total")

alphas_china <- sapply(vars, function(v) {
  ratings <- rbind(combined[[paste0(v, "_coder1")]], combined[[paste0(v, "_coder2")]])
  kripp.alpha(ratings, method = "ratio")$value
})

alphas_china

################## INDONESIA ############################

## Load and clean the original coding (done by 2 people)

coder1 <- read_excel("../data/indonesia_original_coder1.xlsx")
coder2 <- read_excel("../data/indonesia_original_coder2.xlsx")

# Filter each data frame
coder1_filtered <- coder1 %>% filter(coder1 == 1)
coder2_filtered <- coder2 %>% filter(coder2 == 1)

# Combine the filtered data frames
coder1and2 <- bind_rows(coder2_filtered, coder1_filtered)

# Convert Excel numeric dates to Date format
coder1and2$date <- as.Date(coder1and2$date)

# Convert to numeric
coder1and2 <- coder1and2 %>%
  mutate(across(7:180, as.numeric))

coder1and2 <- coder1and2 |>
  select(date, 7:180)

# Replace NA values with 0
coder1and2[is.na(coder1and2)] <- 0

# Create same summary columns
# Note: In the excel Indonesia coding file with summary columns:
# "_mention" means category 1
# "_symbol" means category 2
# "_propaganda" means category 3

coder1and2 <- coder1and2 %>%
  mutate(
    sukarno_symbolic = rowSums(across(c(leader_symbol, leader_propaganda))),
    sukarno_total = rowSums(across(c(leader_symbol, leader_propaganda, leader_mention))),
    others_symbolic = rowSums(across(c(other_symbol, other_propaganda))),
    others_total = rowSums(across(c(other_symbol, other_propaganda, other_mention)))
  ) 

## Load and clean coder 3's coding of 20 randomly chosen issues

coder3 <- read_excel("../data/indonesia_coder3.xlsx")

# Convert date column to Date
coder3$date <- as.Date(coder3$date)

# Convert columns 6 to 110 to numeric
coder3 <- coder3 %>%
  mutate(across(6:119, as.numeric))

coder3 <- coder3 |>
  select(date, 6:119)

# Replace NA values with 0
coder3[is.na(coder3)] <- 0

# Create the same summary columns for Sukarno and others
# Note: In the excel Indonesia coding file with summary columns:
# "_mention" means category 1
# "_symbol" means category 2
# "_propaganda" means category 3

coder3 <- coder3 %>%
  mutate(
    sukarno_symbolic = rowSums(across(c(leader_symbol, leader_propaganda))),
    sukarno_total = rowSums(across(c(leader_symbol, leader_propaganda, leader_mention))),
    others_symbolic = rowSums(across(c(other_symbol, other_propaganda))),
    others_total = rowSums(across(c(other_symbol, other_propaganda, other_mention)))
  ) 

## Prepare datasets for comparison

# Select relevant columns and rename Mao_3 as Mao_symbolic
coder1and2_select <- coder1and2 %>%
  select(date, sukarno_total, sukarno_symbolic, leader_mention, others_symbolic, others_total, other_mention) %>%
  filter(date %in% coder3$date) |>
  rename(sukarno_non_symbolic = leader_mention,
         others_non_symbolic = other_mention)

coder3_select <- coder3 %>%
  select(date, sukarno_symbolic, sukarno_total, leader_mention, others_symbolic, others_total, other_mention) |>
  rename(sukarno_non_symbolic = leader_mention,
         others_non_symbolic = other_mention)


## Combine datasets by date for rating comparison

combined <- inner_join(coder1and2_select, coder3_select, by = "date", suffix = c("_c1", "_c2"))


## Calculate Krippendorff's alpha 

# Stack ratings from both coders (coder1 and ta)
sukarno_symbolic_ratings <- rbind(
  combined$sukarno_symbolic_c1,
  combined$sukarno_symbolic_c2
)

# Calculate alpha
alpha_sukarno_symbolic <- kripp.alpha(sukarno_symbolic_ratings, method = "ratio")$value


## Calculate alphas for other variables

vars <- c("sukarno_symbolic", "sukarno_non_symbolic", "sukarno_total", "others_symbolic", "others_non_symbolic", "others_total")

alphas_indonesia <- sapply(vars, function(v) {
  ratings <- rbind(combined[[paste0(v, "_c1")]], combined[[paste0(v, "_c2")]])
  kripp.alpha(ratings, method = "ratio")$value
})

alphas_indonesia

################## VIETNAM ############################

## Load and clean coder 1's full coding

# Read data and remove unwanted rows
coder1 <- read_excel("../data/vietnam_original.xlsx")

# Convert columns 3 to 36 to numeric
coder1 <- coder1 %>%
  mutate(across(3:36, as.numeric))

# Replace NA values with 0
coder1[is.na(coder1)] <- 0

# Create summary columns for Ho Chi Minh and others
coder1 <- coder1 %>%
  mutate(
    HCM_symbolic = rowSums(across(c(HCM_2, HCM_3))),
    HCM_total = rowSums(across(c(HCM_1, HCM_2, HCM_3))),
    others_symbolic = rowSums(across(ends_with("_2") | ends_with("_3") & !matches("HCM"))),
    others_non_symbolic = rowSums(across(ends_with("_1") & !matches("HCM"))),
    others_total = rowSums(across(ends_with("_1") | ends_with("_2") | ends_with("_3") & !matches("HCM")))
  )

## Load and clean coder 2's coding of 20 randomly chosen issues

coder2 <- read_excel("../data/vietnam_coder2.xlsx")

# Remove first row
coder2 <- coder2[, -1]

# Convert columns 3 to 36 to numeric
coder2 <- coder2 %>%
  mutate(across(3:36, as.numeric))

# Replace NA values with 0
coder2[is.na(coder2)] <- 0

# Create same summary columns for Mao and others
coder2 <- coder2 %>%
  mutate(
    HCM_symbolic = rowSums(across(c(HCM_2, HCM_3))),
    HCM_total = rowSums(across(c(HCM_1, HCM_2, HCM_3))),
    others_symbolic = rowSums(across(ends_with("_2") | ends_with("_3") & !matches("HCM"))),
    others_non_symbolic = rowSums(across(ends_with("_1") & !matches("HCM"))),
    others_total = rowSums(across(ends_with("_1") | ends_with("_2") | ends_with("_3") & !matches("HCM")))
  )


## Prepare datasets for comparison

# Select relevant columns and rename Mao_3 as Mao_symbolic
coder1_select <- coder1 %>%
  select(Date, HCM_1, HCM_symbolic, HCM_total, others_symbolic, others_non_symbolic, others_total) %>%
  filter(Date %in% coder2$Date) %>%
  rename(HCM_non_symbolic = HCM_1)

coder2_select <- coder2 %>%
  select(Date, HCM_1, HCM_symbolic, HCM_total, others_symbolic, others_non_symbolic, others_total) %>%
  rename(HCM_non_symbolic = HCM_1)


## Combine datasets by date for rating comparison

combined <- inner_join(coder1_select, coder2_select, by = "Date", suffix = c("_coder1", "_coder2"))


## Calculate Krippendorff's alpha 

# Stack ratings from both coders (coder1 and coder2)
HCM_symbolic_ratings <- rbind(
  combined$HCM_symbolic_coder1,
  combined$HCM_symbolic_coder2
)

# Calculate alpha
alpha_HCM_symbolic <- kripp.alpha(HCM_symbolic_ratings, method = "ratio")$value


## Calculate alphas for other variables 

vars <- c("HCM_symbolic", "HCM_non_symbolic", "HCM_total", "others_symbolic", "others_non_symbolic", "others_total")

alphas_vn <- sapply(vars, function(v) {
  ratings <- rbind(combined[[paste0(v, "_coder1")]], combined[[paste0(v, "_coder2")]])
  kripp.alpha(ratings, method = "ratio")$value
})

alphas_vn

########################### OUTPUT ##############################

category <- c("Symbolic (leader)", "Non-symbolic (leader)", "Total (leader)",
              "Symbolic (others)", "Non-symbolic (others)", "Total (others)")
alphas <- data.frame(category,
                     Vietnam = unname(round(alphas_vn, 2)),
                     China = unname(round(alphas_china, 2)),
                     Indonesia = unname(round(alphas_indonesia, 2)))

write.csv(alphas, "../output/appendix_tabG1_intercoder_reliability.csv")
